********************************************************************************
/* Cleaning raw data               

	***DTA OUTPUT: 
	- cjoint_wide.dta
	- summary_stats.dta 
	- stereotypes.dta
*/
********************************************************************************

clear
	clear matrix
	clear mata
	set more off
	set mem 5000m
	set maxvar 20000

cd "/Users/georgemelios/Downloads/PSRM_Replication 3"
use "DATA/cjoint_raw.dta", clear

********************************************************************************
**# Demographics
********************************************************************************

* Gender

destring gender, force replace
la define gender 1 "Male" 2 "Female" 3 "Non-binary"  99 "Refused"
la val gender gender

recode gender (1=1 "Male") (2=0 "Female") (else=.), gen (male_female)

* Age

destring age, force replace //18 to 40

* Relationship

destring rel, force replace
replace rel = 3 if rel == 4 //demographic data of pid indicates engaged
la define relationship 1 "Single, never married" 2 "In a relationship" 3 "Engaged" 4 "Married" 5 "Separated/divorced" 6 "Widowed" 99 "Prefer not to say"
ren rel relationship
la val relationship relationship

la list relationship
recode relationship ///
(1 5/6 =1 "Single") ///
(2 3/4 =2 "In a relationship") ///
(else=.), gen (rel_short)

* Orientation

destring orientation, force replace
la define orientation 1 "Men" 2 "Women" 5 "No preference"
la val orientation orientation

* Identify which choice sets were completed by those with no preference (orientation=5)
gen which_set = 0
replace which_set = 1 if orientation == 5 & wset1 == . //male sets
replace which_set = 2 if orientation == 5 & mset1 == . //female sets
la define which_set 1 "men sets" 2 "women sets"
la val which_set which_set

replace which_set = orientation if which_set ==0 

* Education

destring edu, force replace
la define edu 1 "None" 2 "Primary" 3 "Secondary" 4 "Higher/A-levels" 5 "University" 6 "Post-graduate" 99 "Prefer not to say"
la val edu edu

destring edu, force replace
recode edu ///
(1/4 = 0 "No degree") ///
(5/6 = 1 "Degree") ///
(else=.), gen (degree)

* Race

destring ethnic, force replace
la define ethnic 1 "White" 2 "Black" 3 "Asian" 4 "Mixed" 5 "Other" 99 "Prefer not to say"
la val ethnic ethnic

ta ethnic, gen (ethnic)
ren (ethnic1 ethnic2) (white black)
replace white = . if ethnic ==99
replace black = . if ethnic ==99

* Beauty

destring beauty_1, force replace
ren beauty_1 beauty

sum beauty, d // mean is 5.7, median is 6

recode beauty ///
(0/6=0 "Low") /// at or below median
(7/10=1 "High") /// above median
(else=.), gen (beauty_short)

* Height

//some manual corrections because height was a text entry
gen height = ""
replace height = "5.2" if pid == "6085a4cc5f5f9ba1a3bfd785"
replace height = "5.7" if height_1_1 == "5.7" 
replace height = "5.9" if height_1_1 == "5.9" | height_1_1 == "59"
replace height = "5.6" if pid == "6403da7963bddf9eef602e4c"
replace height = "5.5" if pid == "5f526f33a80f5b51817cc53c"
replace height = "6.2" if height_1_1 == "62"
replace height = "7.3" if height_1_1 == "73"
replace height_1_2 = "" if height_1_1 == ""
replace height_1_1 = "" if height_1_1 == "1.83" | height_1_1 == "13" | height_1_1 == "170" | height_1_1 == "272"

replace height_1_2 = "" if height != ""
replace height_1_1 = "" if height != ""

replace height_1_2 = "1" if height_1_2 == "1.2"
replace height_1_2 = "2" if height_1_2 == "I 2"
replace height_1_2 = "3" if height_1_2 == "2.9"
replace height_1_2 = "3" if height_1_2 == "3.5"
replace height_1_2 = "4" if height_1_2 == "4.5" | height_1_2 == "40"
replace height_1_2 = "5" if height_1_2 == "5.5"
replace height_1_2 = "6" if height_1_2 == "6.5"
replace height_1_2 = "7" if height_1_2 == "7.5" | height_1_2 == "72"
replace height_1_2 = "8" if height_1_2 == "8.5"
replace height_1_2 = "11" if height_1_2 == "110" | height_1_2 == "12"
replace height_1_2 = "" if height_1_2 == "160"  
replace height_1_2 = "3" if height_1_2 == "32"

//convert to cm
recast str18 height_1_1
recast str18 height_1_2

ereplace height_1_1 = sieve(height_1_1), keep(numeric)  //keep numeric
ereplace height_1_2 = sieve(height_1_2), keep(numeric) 
destring height_1_1 height_1_2, force replace

replace height_1_1 = . if height_1_1 == 8  //outlier
replace height_1_1 = height_1_1*30.48
replace height_1_2 = height_1_2*2.54

egen newheight = rowtotal(height_1_1 height_1_2)
replace newheight = . if newheight ==0
replace newheight = . if height_1_1 == .

split height, p(.)

destring height1 height2, force replace
replace height1 = height1*30.48
replace height2 = height2*2.54

replace newheight = height1 + height2 if newheight == . 

//set extreme outliers to missing

extremes(new)
replace new = . if new < 149
replace new = . if new > 194

destring newheight, force replace //destring
drop height*
ren newheight height

* Diet: 

destring diet, force replace
la define diet 1 "Vegan" 2 "Vegetarian" 3 "Meat-eater" 99 "Refused"
la val diet diet

recode diet ///
(1/2 = 1 "Plant-based") ///
(3=0 "Meat") ///
(else=.), gen (diet_short)

********************************************************************************
**# Political Questions
********************************************************************************

* Thermometer - Like - Ideology

destring party_therm_1 party_therm_2 labour_like_1 tory_like_1 ideology_1, force replace

ren (party_therm_1 party_therm_2 labour_like_1 tory_like_1 ideology_1) (labour_therm tory_therm labour_like tory_like ideology)

* Vote

destring vote, force replace
la define vote 1 "I did not vote" 2 "I was not eligible to vote" 3 "Conservative" 4 "Labour" 5 "Liberal Democrat" 6 "Scottish National Party" 7 "Plaid Cymru" 8 "UK Independence Party" 9 "Green Party" 10 "British National Party" 11 "Other"
la val vote vote

* Generate Party Identifiers

// (1) start with voting 
recode vote (4=1 "Labour") (3=2 "Tory") (else=.), gen(party_id)
// (2) if voted for other party - or did not vote - or missing -> use thermometer
	//labour id = 1 if rated labour party higher and did not vote for conservatives
replace party_id = 1 if labour_therm > tory_therm & party_id == .
	//tory id = 1 if rated conservative party higher and did not vote for labour
replace party_id = 2 if tory_therm > labour_therm & party_id == .
	//not aligned
replace party_id = 3 if tory_therm == labour_therm & party_id ==.
	//missing
replace party_id = . if tory_therm ==. & labour_therm ==. & vote ==.
la define party_id 3 "Neither", modify

* Ideology 

destring ideology, force replace

recode ideology ///
(0/4 = 1 "Right") ///
(5 = 2 "Moderate") ///
(6/10 = 3 "Left") ///
(else=.), gen(right_left)

* Stereotypes

la define stereotype 1 "Conservatives" 2 "Labour" 3 "Neither"
destring st_vegan st_vegetarian st_white st_black st_progressive st_trad st_degree st_nodegree, force replace
la val st_* stereotype

********************************************************************************
save "DATA/cjoint_wide.dta", replace
********************************************************************************

********************************************************************************
**# Table 1
********************************************************************************
preserve
//gender
recode gender ///
(1=1 "Male") ///
(2=2 "Female") ///
(3=3 "Non-Binary") ///
(else=.), gen(gender_recoded)

ta gender_recoded, gen(gender)
ren (gender1 gender2 gender3) (Male Female NonBinary)

//relationship
ta rel_short, gen(rel)
ren (rel1 rel2) (Single Relationship)

//education

ta degree, gen(degree)
ren (degree1 degree2) (no_degree with_degree)

//ethnicity 

replace ethnic = . if ethnic == 99
ta ethnic, gen (ethnic_short)

//diet 

ta diet_short, gen(diet_short)

//party
ta party_id, gen(party_id)

//ideology
ta right_left, gen(right_left)

//export to create R table
keep Male Female NonBinary age no_degree degree ethnic_short1 ethnic_short2 ethnic_short3 ethnic_short4 ethnic_short5 diet_short1 diet_short2  party_id1 party_id2 party_id3 right_left1 right_left2 right_left3   

save "DATA/for_R/summary_stats.dta", replace
restore

********************************************************************************
**# Figure 7
********************************************************************************
preserve
gen id = _n 
rename (st_progressive st_vegan st_vegetarian st_black st_nodegree st_degree st_trad st_white) (stereo#), addnumber(1)
reshape long stereo , i(id) 

la define _j 1 "Progressive" 2 "Vegan" 3 "Vegetarian" 4 "Black" 5 "No degree" 6 "Degree"7 "Traditional" 8 "White" 
la val _j _j

recode stereo ///
(1=3 "Tories") ///
(2=1 "Labour") ///
(3=2 "Neither") ///
(else=.), gen (stereotypes)

keep stereotypes _j
ren _j attribute

save "DATA/for_R/stereotypes.dta", replace
restore






